library(data.table)
library(foreign)
library(stringr)
library(rhandsontable)
library(jsonlite)
setwd("~/Google Drive/2017-AGRA")
load("./tmp/2017-agra-aasr.RData")
#####################################################################################
# Helper - Construct variable summary
summaryTable <- function(dt) {
dt.lbl <- data.table(
varCode=names(dt),
varLabel=attr(dt, "var.labels")
)
tmp <- as.data.table(summary(dt))
tmp[, V1 := 1:7]
tmp <- dcast(tmp, V2~V1)
setnames(tmp, c("varCode", "Min.", "1st Qu.", "Median", "Mean", "3rd Qu.", "Max.", "NA's"))
tmp[, varCode := str_trim(varCode)]
setkey(tmp, varCode)
setkey(dt.lbl, varCode)
dt.lbl <- tmp[dt.lbl]
dt.lbl <- dt.lbl[names(dt)]
dt.lbl[, `Hist.` := sapply(dt, function(x) ifelse(class(x)=="numeric", toJSON(list(values=hist(x, plot=FALSE)$counts)), toJSON(NA)))]
setcolorder(dt.lbl, c(1,9,10,2:8))
return(dt.lbl)
}
#####################################################################################
# Load dataset #1 (variables extracted for the Poverty, Land, Climate paper)
hh1 <- read.dta("./tmp/Final_SSA_hhPOV_ 6 Apr 2016.12.dta")
hh.dim <- list(hh1=dim(hh1))
# Show summaries
hh1.lbl <- summaryTable(hh1)
hh1 <- data.table(hh1)
# List surveys/years included in this set
dt1.sum <- hh1[, .(
Regions=uniqueN(svyL1Cd),
Districts=uniqueN(paste(svyL1Cd, svyL2Cd)),
Obs=.N), keyby=.(ISO3, Year=as.integer(year))]
dt1sum <- rhandsontable(dt1.sum, rowHeaders=NULL, height=600, width="auto")
rm(hh1)
# Describe variables
dt1var <- rhandsontable(hh1.lbl, rowHeaders=NULL, height=600, width="auto") %>%
hot_col("Hist.", renderer=htmlwidgets::JS("renderSparkline")) %>%
hot_cols(fixedColumnsLeft=1)
# Load dataset #2 (HarvestChoice Ag Snapshots)
hh2 <- read.dta("./tmp/Combined_Regdemovars13.12.dta")
hh.dim$hh2 <- dim(hh2)
# Summarize
hh2.lbl <- summaryTable(hh2)
hh2 <- data.table(hh2)
# List surveys/years included in this set
dt2.sum <- hh2[, .(
Regions=uniqueN(svyL1Cd),
Obs=.N), keyby=.(ISO3, Year=as.integer(year))]
dt2sum <- rhandsontable(dt2.sum, rowHeaders=NULL, height=600, width="auto")
rm(hh2)
# Describe variables
dt2var <- rhandsontable(hh2.lbl, rowHeaders=NULL, height=600, width="auto") %>%
hot_col("Hist.", renderer=htmlwidgets::JS("renderSparkline")) %>%
hot_cols(fixedColumnsLeft=1)
# Load dataset #3 (Beliyou's Resilience Study)
hh3 <- read.dta("./tmp/Combined_4_Mel.12.dta")
hh.dim$hh3 <- dim(hh3)
# Summarize
hh3.lbl <- summaryTable(hh3)
hh3 <- data.table(hh3)
# List surveys/years included in this set
dt3.sum <- hh3[, .(
Regions=uniqueN(region),
Districts=uniqueN(paste(region, district)),
Obs=.N), keyby=.(ISO3, Year=as.integer(year), round)]
dt3sum <- rhandsontable(dt3.sum, rowHeaders=NULL, height=600, width="auto")
rm(hh3)
# Describe variables
dt3var <- rhandsontable(hh3.lbl[1:150], rowHeaders=NULL, height=600, width="auto") %>%
hot_col("Hist.", renderer=htmlwidgets::JS("renderSparkline")) %>%
hot_cols(fixedColumnsLeft=1)
# Load dataset #4 (SSA Poverty)
hh4 <- read.dta("./tmp/SSApoverty_Dist_forGWR.12.dta")
hh.dim$hh4 <- dim(hh4)
# Keep labels
hh4.lbl <- data.table(varCode=names(hh4), varLabel=attr(hh4, "var.labels"))
# Remove dummy vars
hh4 <- hh4[, !names(hh4) %like% "ctry"]
hh4 <- hh4[, !names(hh4) %like% "yr"]
hh4 <- hh4[, !names(hh4) %like% "fs"]
setkey(hh4.lbl, varCode)
hh4.lbl <- hh4.lbl[names(hh4)]
attr(hh4, "var.labels") <- hh4.lbl$varLabel
# Correct types
for (i in names(hh4)) if (class(hh4[[i]])=="numeric") hist(hh4[[i]], plot=F)
hh4[[i]] <- as.character(hh4[[i]])
# Summarize
hh4.lbl <- summaryTable(hh4)
hh4 <- data.table(hh4)
# List surveys/years included in this set
dt4.sum <- hh4[, .(
Regions=uniqueN(svyL1Cd),
Districts=uniqueN(paste(svyL1Cd, svyL2Cd)),
Obs=.N), keyby=.(ISO3, Year=as.integer(year))]
dt4sum <- rhandsontable(dt4.sum, rowHeaders=NULL, height=600, width="auto")
rm(hh4)
# Describe variables
dt4var <- rhandsontable(hh4.lbl, rowHeaders=NULL, height=600, width="auto") %>%
hot_col("Hist.", renderer=htmlwidgets::JS("renderSparkline")) %>%
hot_cols(fixedColumnsLeft=1)
# Load GHA GLSS rounds (Eduardo)
# Save workspace
rm(i, x, tmp)
save.image("./tmp/2017-agra-aasr.RData")
This study is a prospective chapter in AGRA Africa Agriculture Status Report (AASR) 2017.
The overall objective of the AASR report is to (Hazell, 2017):
The study should provide a context-setting chapter that attempts to characterize, scale, locate, point to prioritization of high-level smallholder commercialization strategies, opportunities and challenges. A proposed approach is to chracterize SHFs and frame business strategies within a 2-by-2 domain framework (low/high rainfed ag potential across low/high market access areas). This approach assumes that agricultural development and adaptation strategies are largely driven by pre-existing bio-physical and spatial conditions.
We propose then focusing on around half a dozen countries for which we have recent/accessible microdata (e.g. LSMS-ISA and maybe AGRA baseline surveys) to look more closely, in the same 2x2 domain framework, at specific farm household and macro characteristics to do two things:
Key farm and household-level variables to look at may include:
Many (unpublished and mostly country-level) studies have attempted to derive farmer characteristics and farm typologies across geospatial dimensions (see e.g. refs below). IFPRI in particular generated cross-country comparable household variables (derived from World Bank LSMS-ISA panels, Demographic and Health Surveys, and/or other large-scale household surveys and agricultural census).
Inventory and summarize the latest versions of IFPRI’s harmonized variables. Some of these variables are also available (summarized) across districts and/or regions and/or gender.
This dataset contains 395 variables and 220460 observations. The breakdown across countries and survey years is as follows, and variables are further described in the next table. Many of these variables are also documented in Azzarri, Signorelli, & Bacou (2016).
dt1sum
dt1var
My copy of the household-level file is corrupt (would need to contact IFPRI directly) but the variables are similar in the region-level file.
This dataset contains 86 variables. The breakdown across countries and survey years is as follows, and variables are further described in the next table.
dt2sum
dt2var
This dataset contains 2670 variables and 101333 observations. The breakdown across countries and survey years is as follows, and variables are further described in the next table.
dt3sum
This dataset also includes over 2,000 constructed climatic variables (monthly SPEI, temperature, rainfall, PDSI, etc.). To save space they are not shown in the variable summary below.
dt3var
This dataset contains 350 variables and 1650 observations. The breakdown across countries and survey years is as follows, and variables are further described in the next table. This is a district-level dataset (IFPRI holds the hh-level variables).
dt4sum
dt4var
IFPRI holds cross-country harmonized variables from the latest DHS. Complete metadata is in a DHS Google Sheet. We would need to request access to the CHILD and WOMAN recodes, I only have the STRATA summaries.
(as needed). Eduardo has 3 rounds (2005, 2008, 2012) of harmonized variables focused on wages/productivity.
(as needed)
In the following we define smallholding as a farm with total cultivated area less or equal to 4 ha. We look at the distributions of the following variables. Note that all value variables have been generated using the RIGA method.
# Load data extracts from the 2016 NGA Segmentation
setwd("~/Google Drive/2016-BMGF-Segmentation/NGA")
load("./temp/nga_bmgf_seg_1.RData")
setwd("~/Google Drive/2017-AGRA")
# SHF 4ha dummy
farm[, croparea_4ha := factor(croparea <= 4, labels=c("> 4 ha", "<= 4 ha"))]
# Merge into `ghs`
setkey(ghs, hhid)
setkey(farm, hhid)
ghs[farm, croparea_4ha := croparea_4ha]
ghs[farm, croparea := croparea]
ghs[is.na(croparea_4ha), .N]
ghs[is.na(croparea), .N]
# Commercialization rate
ghs[, comm_rate_crop := nona(cropsold)/(nona(cropsold)+nona(cropownimp))]
summary(ghs$comm_rate_crop)
# Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
# 0.0000 0.0000 0.0005 0.1037 0.0072 1.0000 1778
# Define survey design (use last GHS wave only)
ghs.svy <- svydesign(~hhid, strata=~zone+rural, weights=~wt_wave2, data=ghs[!is.na(wt_wave2)])
farm.svy <- svydesign(~hhid, strata=~zone+sector, weights=~wt_wave2, data=farm[!is.na(wt_wave2)])
# Subset survey sample to SHF
summary(ghs$croparea_4ha)
# > 4 ha <= 4 ha NA's
# 94 2847 1939
ghs[is.na(croparea) & TLU_total > 0, .N]
# 142
ghs.svy.shf <- subset(ghs.svy, croparea <= 4)
svymean(~farm_hh, ghs.svy, na.rm=T)
# 57%
svymean(~croparea_4ha, ghs.svy, na.rm=T)
# 97%
svytotal(~farm_hh, ghs.svy, na.rm=T)
# total SE
# farm_hhFALSE 14069666 329673
# farm_hhTRUE 18666138 276494
svytotal(~farm_hh, ghs.svy.shf, na.rm=T)
# total SE
# farm_hhFALSE 0 0
# farm_hhTRUE 16111650 248682
svytotal(~hhsize, ghs.svy.shf, na.rm=T)
# total SE
# hhsize 106640203 1923843
# Quick compare `farmarea` and `croparea`
svg("./out/nga-2012_hist-area.svg", family="Roboto Condensed", height=4)
par(mfrow=c(1,3))
svyhist(~farmarea, ghs.svy, main="Farm Area (ha)", xlim=c(0,10))
lines(svysmooth(~farmarea, ghs.svy, bandwidth=1), lwd=1, col="red")
svyhist(~croparea, ghs.svy, main="Cultivated Area (ha)", xlim=c(0,10))
svyboxplot(farmarea~croparea_4ha, ghs.svy, main="Farm Area (ha)")
dev.off()
# croparea
svg("./out/nga-2012_hist-croparea.svg", family="Roboto Condensed", height=5)
par(mfrow=c(1,2))
svyhist(~croparea, ghs.svy.shf, main="Cultivated Area (ha)", xlab="ha")
lines(svysmooth(~croparea, ghs.svy.shf, bandwidth=1), lwd=1, col="red")
plot(svycdf(~croparea, ghs.svy.shf), main="Cultivated Area (ha)", xlab="ha")
abline(h=0.5, lty=2, col="red")
abline(h=0, lty=2, col="grey70")
abline(h=1, lty=2, col="grey70")
dev.off()
# farmarea
svg("./out/nga-2012_hist-farmarea.svg", family="Roboto Condensed", height=5)
par(mfrow=c(1,2))
svyhist(~farmarea, ghs.svy.shf, main="Farm Size (ha)", xlab="ha")
lines(svysmooth(~croparea, ghs.svy.shf, bandwidth=1), lwd=1, col="red")
plot(svycdf(~farmarea, ghs.svy.shf), main="Farm Size (ha)", xlab="ha")
abline(h=0.5, lty=2, col="red")
abline(h=0, lty=2, col="grey70")
abline(h=1, lty=2, col="grey70")
dev.off()
# comm_rate_crop
svg("./out/nga-2012_hist-comm_rate_crop.svg", family="Roboto Condensed", height=5)
par(mfrow=c(1,2))
svyhist(~comm_rate_crop, ghs.svy.shf, main="Crop Commercialization", xlab="share", freq=T)
lines(svysmooth(~comm_rate_crop, ghs.svy.shf, bandwidth=.1), lwd=1, col="red")
plot(svycdf(~comm_rate_crop, ghs.svy.shf), main="Crop Commercialization", xlab="share")
abline(h=0.5, lty=2, col="red")
abline(h=0, lty=2, col="grey70")
abline(h=1, lty=2, col="grey70")
dev.off()
# lstocksold
svg("./out/nga-2012_hist-lstocksold.svg", family="Roboto Condensed", height=5)
par(mfrow=c(1,2))
svyhist(~lstocksold, ghs.svy.shf, main="Sales of Livestock Products", xlab="LCU")
#lines(svysmooth(~lstocksold, ghs.svy.shf), lwd=1, col="red")
plot(svycdf(~lstocksold, ghs.svy.shf), main="Sales of Livestock Products", xlab="LCU")
abline(h=0.5, lty=2, col="red")
abline(h=0, lty=2, col="grey70")
abline(h=1, lty=2, col="grey70")
dev.off()
# TLU_total
svg("./out/nga-2012_hist-TLU_total.svg", family="Roboto Condensed", height=5)
par(mfrow=c(1,2))
svyhist(~TLU_total, ghs.svy.shf, main="Total TLU", xlab="", breaks=seq(0,251,5), xlim=c(0,50))
#lines(svysmooth(~TLU_total, ghs.svy.shf), lwd=1, col="red")
plot(svycdf(~TLU_total, ghs.svy.shf), main="Total TLU", xlab="")
abline(h=0.5, lty=2, col="red")
abline(h=0, lty=2, col="grey70")
abline(h=1, lty=2, col="grey70")
dev.off()
# agshare
svg("./out/nga-2012_hist-agshare.svg", family="Roboto Condensed", height=5)
par(mfrow=c(1,2))
svyhist(~agshare, ghs.svy.shf, main="Share of Farm Income", xlab="share")
lines(svysmooth(~agshare, ghs.svy.shf, bandwidth=0.1), lwd=1, col="red")
plot(svycdf(~agshare, ghs.svy.shf), main="Share of Farm Income", xlab="share")
abline(h=0.5, lty=2, col="red")
abline(h=0, lty=2, col="grey70")
abline(h=1, lty=2, col="grey70")
dev.off()
# Gross ag income
tmp <- ghs[!is.na(wt_wave2) & farmarea<=4, sum(aggross*wt_wave2, na.rm=T)/1e+9, keyby=croparea <= 4]
# Livestock TLU
tmp <- ghs[!is.na(wt_wave2), sum(TLU_total*wt_wave2, na.rm=T), keyby=croparea <= 4]
# Cultivated area
tmp <- farm[!is.na(wt_wave2), sum(croparea*wt_wave2, na.rm=T), keyby=croparea <= 4]
# Demographics
tmp <- ghs[!is.na(wt_wave2), lapply(.SD, weightedMean, wt_wave2, na.rm=T),
.SDcols=c("wt_wave2", "hhsize", "agehead", "lithead", "eduyears", "numchildren",
"equiv", "females", "males", "sh_ed_none", "sh_ed_prim", "sh_ed_high"),
keyby=croparea <= 4][, wt_wave2 := NULL]
# SE
tmp <- svyby(~farmarea, ~croparea <= 4, ghs.svy, svymean, na.rm=T)
# Population
tmp <- ghs[!is.na(wt_wave2), sum(wt_wave2*hhsize, na.rm=T), keyby=croparea <= 4]
# Farms statistics
tmp <- ghs[!is.na(wt_wave2) & farm_hh==T, .(
f2malehh_tot=sum(femhead*wt_wave2, na.rm=T),
malehh_tot=sum(-(femhead-1)*wt_wave2, na.rm=T),
femalehh=weightedMean(femhead, wt_wave2, na.rm=T),
hasland=weighted.mean(farmarea>0, wt_wave2, na.rm=T),
farmarea=weightedMean(hectares_1_hh_farmer, wt_wave2, na.rm=T),
farmarea_med=weightedMedian(hectares_1_hh_farmer, wt_wave2, na.rm=T)
), keyby=croparea <= 4]
tmp <- farm[, .(
farmslive_tot=sum(farmslive*wt_wave2, na.rm=T),
farmslive=weightedMean(farmslive, wt_wave2, na.rm=T),
intens_idx2=weightedMean(intens_idx2, wt_wave2, na.rm=T),
intens_idx2_med=weightedMedian(intens_idx2, wt_wave2, na.rm=T)
), keyby=croparea <= 4]
# Livestock
tmp <- ghs[!is.na(wt_wave2) & totgross > 0, .(
TLU_total=sum(TLU_total, wt_wave2, na.rm=T),
TLU_hh=weightedMean(TLU_total, wt_wave2, na.rm=T),
TLU_hh_imp=weightedMean(TLU_total_imp, wt_wave2, na.rm=T)
), keyby=croparea <= 4][, wt_wave2 := NULL]
# Income
tmp <- ghs[!is.na(wt_wave2) & farm_hh==T, lapply(.SD, weightedMean, wt_wave2, na.rm=T),
.SDcols=c("wt_wave2", "agsales", "aggross", "totgross", "agshare"),
keyby=croparea <= 4][, wt_wave2 := NULL]
# Input uses
tmp <- farm[, lapply(.SD, weightedMean, wt_wave2, na.rm=T),
.SDcols=c("wt_wave2", "parcels", "sh_chempest", "sh_chemherb", "sh_fertorg",
"sh_fertinorg", "sh_seedp", "tractorown_farms"),
keyby=croparea <= 4][, wt_wave2 := NULL]
# Poverty
vars <- c("weight", names(ppp12)[names(ppp12) %like% "ppp"])
tmp <- ppp12[, lapply(.SD, weightedMean, weight, na.rm=T), .SDcols=vars,
keyby=croparea <= 4][, weight := NULL]
Below is the estimated distribution of farm size and cultivated area in the entire population.
include_graphics("../out/nga-2012_hist-area.svg")
Using the <= 4 ha definition above, for Nigeria we estimate as of 2012:
The subsample of small farmholdings is characterized as follows.
include_graphics(c(
"../out/nga-2012_hist-croparea.svg",
"../out/nga-2012_hist-farmarea.svg",
"../out/nga-2012_hist-comm_rate_crop.svg",
"../out/nga-2012_hist-lstocksold.svg",
"../out/nga-2012_hist-TLU_total.svg",
"../out/nga-2012_hist-agshare.svg"
))
For documentation purposes below are code snippets from the 2016 Farmer Segmentation for Nigeria (definitions and characterization of quadrants and livelihood zones).
#####################################################################################
# 2016.05.03 Quadrant Definitions across Livelihood Zones (from Diemuth 2016.04.06)
#####################################################################################
# We need to add a new segment vars to all key micro datasets, starting with GHS.
# Diemuth provided latest quadrant definitions.
#
# - [country]_agpotallclass_1km.tif – Agricultural potential raster with 1km cell
# resolution and same extent as ethtt_1km100khr. Created from clipping sub-Saharan Africa
# Agricultural Potential 5 minute grid vector layer from FAO & IIASA, 2007. For the
# 2x2 segmented data, 5-6 values were considered high, 1-4 low and -809-0 were
# unsuitable and -997 were water.
# -997 = Water
# -809 = Protected Areas
# -807 = Urban Areas
# -805 = Irrigated Areas
# -801 = Forest
# 0 = Land not suited for agriculture
# 1 = Land very poorly suited for pasture and at best poorly suited for rainfed crops
# 2 = Land poorly suited for pasture and at best poorly suited for rainfed crops
# 3 = Lands suited for pasture and at best poorly suited for rainfed crops
# 4 = Land suited for rainfed crops and pasture possible
# 5 = Land well suited for rainfed crops and pasture possible
# 6 = Prime land suited for rainfed crops and pasture possible
#
# - [country]_tt1km100khr_4hr.tif – Classified travel time raster with a 4 hr threshold,
# HarvestChoice, 2015
# 100 – travel time greater than 4 hours, lo access
# 200 – travel time less than or equal to 4 hours, high access
# Load the 2 rasters and classify LGAs
qd.agpot <- raster("./maps/quadrant/nga_agpotallclass_1km.tif")
qd.tt100k <- raster("./maps/quadrant/nga_tt1km100khr_4hr.tif")
proj4string(qd.agpot)
proj4string(qd.tt100k)
proj4string(geo12.pts)
unique(qd.agpot)
# [1] -997 -809 -807 -805 -801 0 1 2 3 4 5 6
summary(qd.tt100k)
# nga_tt1km100khr_4hr
# Min. 100
# 1st Qu. 100
# Median 200
# 3rd Qu. 200
# Max. 200
# NA's 0
# Note that raster tt100k is already classified.
qd.agpot.lbl <- c(
"water",
"protected area",
"urban area",
"irrigated area",
"forest",
"land not suited for agriculture",
"land very poorly suited for pasture and at best poorly suited for rainfed crops",
"land poorly suited for pasture and at best poorly suited for rainfed crops",
"land suited for pasture and at best poorly suited for rainfed crops",
"land suited for rainfed crops and pasture possible ",
"land well suited for rainfed crops and pasture possible",
"prime land suited for rainfed crops and pasture possible")
qd.agpot.lbl <- data.table(varCode=unique(qd.agpot), varLabel=qd.agpot.lbl)
# Intersect with GHS coords
tmp <- extract(qd.agpot, geo12.pts)
tmp <- data.table(hhid=geo12.pts$hhid, qd.agpot=tmp)
tmp[, qd.agpot := factor(qd.agpot, levels=qd.agpot.lbl$varCode, labels=qd.agpot.lbl$varLabel)]
tmp1 <- extract(qd.tt100k, geo12.pts)
tmp[, qd.tt100k := tmp1]
tmp[, qd.tt100k := factor(qd.tt100k, levels=c(100, 200), labels=c("ML", "MH"))]
setkey(tmp, hhid)
setkey(geo12, hhid)
geo12 <- tmp[geo12]
rm(tmp, tmp1)
# Classify hhlds across quadrants
# Rename old segment var to avoid confusion
setnames(geo12, c("qd.agpot", "qd.tt100k"), c("qd_agpot", "qd_tt100k"))
setnames(geo12, "seg_quad", "seg_quad_old")
setnames(ghs, "seg_quad", "seg_quad_old")
setnames(ppp12, "seg_quad", "seg_quad_old")
setnames(inc12, "seg_quad", "seg_quad_old")
setnames(farm, "seg_quad", "seg_quad_old")
geo12[qd_agpot %in% qd.agpot.lbl$varLabel[1:6], qd_agpot_clas := "AU"]
geo12[qd_agpot %in% qd.agpot.lbl$varLabel[7:10], qd_agpot_clas := "AL"]
geo12[qd_agpot %in% qd.agpot.lbl$varLabel[11:12], qd_agpot_clas := "AH"]
summary(factor(geo12$qd_agpot_clas))
# AH AL AU NA's
# 1968 1490 1323 117
summary(factor(geo12$qd_tt100k))
# ML MH NA's
# 750 4051 97
# Impute missing hhld coords with LGA means
# Compute LGA means from `g2` map (using dominant class)
geo12[is.na(qd_agpot_clas), .N, by=.(adm1_nm, adm2_nm)]
# => missing records in 58 LGAs
tmp <- geo12[!is.na(qd_agpot_clas), .(
qd_agpot_clas=modal(qd_agpot_clas, na.rm=T),
qd_tt100k=modal(qd_tt100k, na.rm=T)
), keyby=.(adm1_nm, adm2_nm)]
setnames(tmp, 3:4, c("qd_agpot_clas_imp", "qd_tt100k_imp"))
setkey(geo12, adm1_nm, adm2_nm)
geo12 <- tmp[geo12]
geo12[is.na(qd_agpot_clas), qd_agpot_clas := qd_agpot_clas_imp]
geo12[is.na(qd_tt100k), qd_tt100k := qd_tt100k_imp]
geo12[is.na(qd_agpot_clas), .N, by=.(adm1_nm, adm2_nm)]
# adm1_nm adm2_nm N
# 1: Rivers Bonny 10
geo12[is.na(qd_tt100k), .N, by=.(adm1_nm, adm2_nm)]
# Empty data.table (0 rows) of 3 cols: adm1_nm,adm2_nm,N
# => only 10 missing records in Bonny
# What is the dominant agpot in Bonny? Extract all LGAs for re-use.
tmp <- extract(qd.agpot, g2, fun=modal, na.rm=T)
g2$qd_agpot <- tmp
# Note that we already have median 100k travel time across LGAs in `g2$mkt100k`
# though I'm not sure it's the same source as Diemuth/Stan
# load Joe's latest travel times
tt100k <- dt[, .SD, .SDcols=c("CELL5M", "X", "Y", "tt10_100k")]
rm(dt)
tt100k <- SpatialPixelsDataFrame(tt100k[, .(X,Y)], data.frame(tt100k))
proj4string(tt100k) <- CRS("+init=epsg:4326")
tt100k <- raster(tt100k, layer="tt10_100k")
tmp <- extract(tt100k, g2, fun=median, na.rm=T)
g2$qd_tt100k <- tmp
# Export for mapping
tt100k <- crop(tt100k, g2)
tt100k <- mask(tt100k, g2)
writeRaster(tt100k, "./maps/markets/tt100k.tif")
rm(tmp)
# What is Bonny?
g2[g2$adm2_nm=="Bonny", "qd_agpot"]
# => -997, so AU
geo12[is.na(qd_agpot_clas), qd_agpot_clas := "AU"]
summary(factor(geo12$qd_agpot_clas))
# AH AL AU
# 1997 1530 1371
# Make final quadrant classification for GHS
geo12[, seg_quad := paste0(qd_tt100k, qd_agpot_clas)]
geo12[, unique(seg_quad)]
# [1] "MHAU" "MHAL" "MHAH" "MLAH" "MLAL" "MLAU"
geo12[, seg_quad := ordered(seg_quad, levels=c("MLAU", "MLAL", "MLAH", "MHAU", "MHAL", "MHAH"))]
# Merge quadrants into `ghs`, `farm` and `ppp12`
setkey(geo12, hhid)
setkey(ghs, hhid)
setkey(farm, hhid)
setkey(ppp12, hhid)
ghs$seg_quad <- geo12[ghs][, seg_quad]
farm$seg_quad <- geo12[farm][, seg_quad]
ppp12$seg_quad <- geo12[ppp12][, seg_quad]
# Re-run summary tables
# => done
#####################################################################################
# Crop preferences across quadrants/zones - GHS
#####################################################################################
# Note that Cleo generated the hh-level vars and Eduardo estimated state-level vars
ghs.plot <- read.dta("./data/NGA-GHS/NGA_GHS_Indicator_plot_2012.dta")
ghs.plot <- data.table(ghs.plot)
# Merge in segmentation vars
setkey(geo12, hhid)
setkey(ghs.crop, hhid)
setkey(ghs.plot, hhid)
ghs.crop$seg_quad <- geo12[ghs.crop][, seg_quad]
ghs.plot$seg_quad <- geo12[ghs.plot][, seg_quad]
ghs.crop$seg_lvl_6 <- geo12[ghs.crop][, seg_lvl_6]
ghs.plot$seg_lvl_6 <- geo12[ghs.plot][, seg_lvl_6]
# Share of female plot managers
tmp <- ghs.plot[, .N, by=.(hhid, wt_w2v1, manager, state, gender)]
tmp <- ghs.crop[, .(
num_fem_mgr=sum(num_fem_mgr*wt_wave2, na.rm=T)/sum(num_hh_mgr*wt_wave2, na.rm=T),
fem_mgr=weighted.mean(num_fem_mgr>0, wt_wave2, na.rm=T),
fem_earn=weighted.mean(fem_earn, wt_wave2, na.rm=T)
), keyby=seg_lvl_6]
tmp <- ghs.crop[, .(
num_fem_mgr=sum(num_fem_mgr*wt_wave2, na.rm=T)/sum(num_hh_mgr*wt_wave2, na.rm=T),
fem_mgr=weighted.mean(num_fem_mgr>0, wt_wave2, na.rm=T),
fem_earn=weighted.mean(fem_earn, wt_wave2, na.rm=T)
), keyby=seg_quad]
tmp <- ghs.crop[, .(
num_fem_mgr=sum(num_fem_mgr*wt_wave2, na.rm=T)/sum(num_hh_mgr*wt_wave2, na.rm=T),
fem_mgr=weighted.mean(num_fem_mgr>0, wt_wave2, na.rm=T),
fem_earn=weighted.mean(fem_earn, wt_wave2, na.rm=T))]
# Share of female-managed plots
tmp <- ghs.plot[, .(femplotmgr=weighted.mean(gender=="female", wt_w2v1, na.rm=T)),
keyby=seg_lvl_6]
tmp <- ghs.plot[, .(femplotmgr=weighted.mean(gender=="female", wt_w2v1, na.rm=T)),
keyby=seg_quad]
tmp <- ghs.plot[, .(femplotmgr=weighted.mean(gender=="female", wt_w2v1, na.rm=T))]
# TODO Area of female-managed plots under each crop
tmp <- ghs.plot[, lapply(.SD, sum, na.rm=T),
keyby=.(hhid, wt_w2v1, state, gender), .SDcols=96:147]
tmp <- tmp[, lapply(.SD, function(x) sum(x*wt_w2v1, na.rm=T)),
keyby=.(state, gender), .SDcols=c(2, 5:56)]
tmp[, wt_w2v1 := NULL]
tmp <- melt(tmp, id.vars=1:2)
tmp[, variable := str_replace(variable, "area_", "")]
tmp <- tmp[!is.na(gender)]
# Proportion of female-managed plots by crop
tmp <- melt(ghs.plot,
id.vars=c("seg_quad", "seg_lvl_6", "gender", "wt_w2v1"),
measure.vars=11:147)
tmp[value=="yes", value := "1"]
tmp[value=="no", value := "0"]
tmp[, value := as.numeric(value)]
tmp <- tmp[, lapply(.SD, weighted.mean, wt_w2v1), keyby=.(seg_lvl_6, gender, variable)]
# TODO Additional production stats
# TODO Also check on Sara's harmonized farm management stats
Note that some of these references are unpublished materials, do not cite.
Alvarez, S., Paas, W., Descheemaeker, K., Tittonell, P., & Groot, J. (2014). Typology construction, a way of dealing with farm diversity: General guidelines for humidtropics. Retrieved from https://cgspace.cgiar.org/bitstream/handle/10568/65374/typology_guidelines.pdf
Azzarri, C., Signorelli, S., & Bacou, M. (2016). Poverty, Land, and Climate in Africa South of the Sahara: an empirical analysis. International Food Policy Research Institute. Retrieved from https://github.com/mbacou/ssa-geospat/raw/master/docs/Azzarri%20(2016)%20Poverty%2C%20Land.pdf
Bacou, M., Magalhaes, E., Hazell, P., & Wood-Sichra, U. (2015). Ethiopia Farm Segmentation Study: Draft report for the Bill and Melinda Gates Foundation. International Food Policy Research Institute. Retrieved from https://drive.google.com/open?id=0B1o4EXFzqcCzcUJSYlNfZzF5MGs
Benin, S., & others. (2016). Agricultural productivity in Africa: Trends, patterns, and determinants. (S. Benin, Ed.). International Food Policy Research Institute. https://doi.org/10.2499/9780896298811
Diao, X., Fang, P., Magalhaes, E., Pahl, S., & Silver, J. and. (2017). Cities and rural transformation: A spatial analysis of rural youth livelihoods in Ghana. In IFPRI discussion paper (Vol. 1599). International Food Policy Research Institute. Retrieved from http://ebrary.ifpri.org/cdm/ref/collection/p15738coll2/id/13105
Douillet, M., & Toulon, A. (2014). Developing a typology of agricultural holdings for improved policy design: A preliminary case study of Malawi. Foundation for World Agriculture and Rurality. Retrieved from http://www.worldagricultureswatch.org/sites/default/files/documents/malawi_report.pdf
Hazell, P. (2013). Comparative Study of trends in Urbanization and Changes in Farm Size in Africa and Asia: Implications for Agricultural Research.
Hazell, P. (2017). Concept note - Africa Agriculture Status Report (AASR) 2017. AGRA.
Jayne, T. S. (Ed.). (2016). Africa Agriculture Status Report 2016 - Progress towards Agricultural Transformation in Africa. AGRA. Retrieved from https://agra.org/aasr2016/public/assr.pdf
Jayne, T. S., Chamberlin, J., Traub, L., Sitko, N., Muyanga, M., Yeboah, K., … others. (2015). Africa’s Changing Farmland Ownership: Causes and Consequences. In Plenary paper presented at the 29th international conference of agricultural economists, milan, italy, august 2015. Retrieved from http://pdf.usaid.gov/pdf_docs/PA00KMWX.pdf
Jayne, T., Yeboah, K., Traub, L., Muyanga, M., Chamberlin, J., & Meyer, F. (2015). How can Agricultural Development Strategies Effectively Reduce Poverty in Africa?: Ag. Learning Lunch seminar, Bill and Melinda Gates Foundation. Seattle. Retrieved from https://github.com/mbacou/ssa-geospat/raw/master/docs/Jayne%20(2015)%20Farmer%20Typology.ppt
Willy, D. K., Muyanga, M., Jayne, T., & others. (2015). Adaptation to Rising Population Density: Voices from Rural Kenya. WPS (Vol. 54/2015). Tegemeo Institute of Agricultural Policy and Development. Retrieved from https://ideas.repec.org/p/ags/egtewp/208250.html